.libPaths(c("/samurlab1/Joshua/more_Rlib/", .libPaths()))
# Mature Neutrophil differential analysis
# will compare the 3 mature neutrophil clusters in single cell RNA-seq dataset
# replot in low dim space using HVF
library(Seurat)
## Loading required package: SeuratObject
## Loading required package: sp
##
## Attaching package: 'SeuratObject'
## The following objects are masked from 'package:base':
##
## intersect, t
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(clusterProfiler)
##
## clusterProfiler v4.12.6 Learn more at https://yulab-smu.top/contribution-knowledge-mining/
##
## Please cite:
##
## T Wu, E Hu, S Xu, M Chen, P Guo, Z Dai, T Feng, L Zhou, W Tang, L Zhan,
## X Fu, S Liu, X Bo, and G Yu. clusterProfiler 4.0: A universal
## enrichment tool for interpreting omics data. The Innovation. 2021,
## 2(3):100141
##
## Attaching package: 'clusterProfiler'
## The following object is masked from 'package:stats':
##
## filter
library(org.Hs.eg.db)
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
## The following object is masked from 'package:SeuratObject':
##
## intersect
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, table,
## tapply, union, unique, unsplit, which.max, which.min
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: IRanges
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:clusterProfiler':
##
## rename
## The following objects are masked from 'package:dplyr':
##
## first, rename
## The following object is masked from 'package:utils':
##
## findMatches
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
##
## Attaching package: 'IRanges'
## The following object is masked from 'package:clusterProfiler':
##
## slice
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
## The following object is masked from 'package:sp':
##
## %over%
##
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:clusterProfiler':
##
## select
## The following object is masked from 'package:dplyr':
##
## select
##
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.5
## ✔ ggplot2 3.5.1 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::%within%() masks IRanges::%within%()
## ✖ IRanges::collapse() masks dplyr::collapse()
## ✖ Biobase::combine() masks BiocGenerics::combine(), dplyr::combine()
## ✖ IRanges::desc() masks dplyr::desc()
## ✖ tidyr::expand() masks S4Vectors::expand()
## ✖ clusterProfiler::filter() masks dplyr::filter(), stats::filter()
## ✖ S4Vectors::first() masks dplyr::first()
## ✖ dplyr::lag() masks stats::lag()
## ✖ ggplot2::Position() masks BiocGenerics::Position(), base::Position()
## ✖ purrr::reduce() masks IRanges::reduce()
## ✖ S4Vectors::rename() masks clusterProfiler::rename(), dplyr::rename()
## ✖ lubridate::second() masks S4Vectors::second()
## ✖ lubridate::second<-() masks S4Vectors::second<-()
## ✖ AnnotationDbi::select() masks clusterProfiler::select(), dplyr::select()
## ✖ purrr::simplify() masks clusterProfiler::simplify()
## ✖ IRanges::slice() masks clusterProfiler::slice(), dplyr::slice()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(circlize)
## ========================================
## circlize version 0.4.16
## CRAN page: https://cran.r-project.org/package=circlize
## Github page: https://github.com/jokergoo/circlize
## Documentation: https://jokergoo.github.io/circlize_book/book/
##
## If you use it in published research, please cite:
## Gu, Z. circlize implements and enhances circular visualization
## in R. Bioinformatics 2014.
##
## This message can be suppressed by:
## suppressPackageStartupMessages(library(circlize))
## ========================================
library(plotly)
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:AnnotationDbi':
##
## select
##
## The following object is masked from 'package:IRanges':
##
## slice
##
## The following object is masked from 'package:S4Vectors':
##
## rename
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
library(monocle3)
## Loading required package: SingleCellExperiment
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'matrixStats'
##
## The following objects are masked from 'package:Biobase':
##
## anyMissing, rowMedians
##
## The following object is masked from 'package:dplyr':
##
## count
##
##
## Attaching package: 'MatrixGenerics'
##
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
##
## The following object is masked from 'package:Biobase':
##
## rowMedians
##
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
##
## Attaching package: 'SummarizedExperiment'
##
## The following object is masked from 'package:Seurat':
##
## Assays
##
## The following object is masked from 'package:SeuratObject':
##
## Assays
##
##
## Attaching package: 'monocle3'
##
## The following objects are masked from 'package:Biobase':
##
## exprs, fData, fData<-, pData, pData<-
library(SeuratWrappers)
library(ggplot2)
library(EnhancedVolcano)
## Loading required package: ggrepel
#neutrophil single cell trajectory analysis
preneu.feats.mus = read.delim(file = "/samurlab1/Joshua/MM_scRNAseq/preneugenes.txt", sep ="")
preneu.feats.mus = preneu.feats.mus$x
#currently, features are in mouse symbol annotation, if we use as is it will coerce an error
#this function uses ensemble annotated features to convert gene names.
musGenes = preneu.feats.mus
mouse_human_genes = read.csv("http://www.informatics.jax.org/downloads/reports/HOM_MouseHumanSequence.rpt",sep="\t")
convert_mouse_to_human <- function(gene_list){
output = c()
for(gene in gene_list){
class_key = (mouse_human_genes %>% filter(Symbol == gene & Common.Organism.Name=="mouse, laboratory"))[['DB.Class.Key']]
if(!identical(class_key, integer(0)) ){
human_genes = (mouse_human_genes %>% filter(DB.Class.Key == class_key & Common.Organism.Name=="human"))[,"Symbol"]
for(human_gene in human_genes){
output = append(output,human_gene)
}
}
}
return (output)
}
preneu.homo = convert_mouse_to_human(as.vector(preneu.feats.mus))
neuts = AddModuleScore(neuts, features = list(preneu.homo), name = "preneu.sig")
## Warning: The following features are not present in the object: F10, ELDR,
## HSPB6, XKR5, not searching for symbol synonyms
FeaturePlot(neuts, features = "preneu.sig1", cols = c("steelblue1","firebrick3"), pt.size = 0.1)
FeaturePlot(neuts, features = "preneu.sig1", split.by="tissue" ,cols = c("steelblue1","firebrick3"), pt.size = 0.1)
## selecting .1 as a stringent cut off for origin local in minimum
spanning tree
neuts@reductions$umap@cell.embeddings = neuts@reductions$umap@cell.embeddings[,c(1,2)]
neuts$predicted = NULL
## Warning: Cannot find cell-level meta data named predicted
hist(neuts$preneu.sig1)
neuts$stem = neuts$preneu.sig1 > .1
# selecting .1 as a stringent cut off for origin local in minimum spanning tree
stemcells = colnames(neuts[,neuts$stem == TRUE])
####monocle3 for sc trajectories####
neuts.cds = as.cell_data_set(neuts)
## Warning: Monocle 3 trajectories require cluster partitions, which Seurat does
## not calculate. Please run 'cluster_cells' on your cell_data_set object
neuts.cds@rowRanges@elementMetadata@listData[["gene_short_name"]] = rownames(neuts.cds[["SCT"]])
neuts.cds = preprocess_cds(neuts.cds, num_dim = 50)
neuts.cds = cluster_cells(neuts.cds)
neuts.cds = learn_graph(neuts.cds)
## | | | 0% | |======================================================================| 100%
neuts.cds <- order_cells(neuts.cds, root_cells = stemcells)
# neuts.cds_pr_test_res = graph_test(neuts.cds, neighbor_graph="principal_graph", cores=64
plot_cells(neuts.cds,
color_cells_by = "pseudotime",
label_groups_by_cluster=FALSE,
label_leaves=FALSE,
label_branch_points=FALSE,
show_trajectory_graph = TRUE, label_principal_points = F, label_roots = T)
## Cells aren't colored in a way that allows them to be grouped.
#T1 signature####
neuts = AddModuleScore(neuts, features =list(c("CXCR2","CD300LD","DUSP1","GBP2","IFITM1","IL1B","ISG15",
"JAML","JUNB","MSRB1","OSM","S100A6","SELPLG","SLPI")),
name = "T1_Signature")
t1 = FeaturePlot(neuts, features = "T1_Signature1",
# split.by = "tissue",
cols = c("steelblue3","red"))
t1
t1 = VlnPlot(neuts, features = "T1_Signature1",
cols = patient_cols,
split.plot = T,
split.by = "tissue",
pt.size = 0)
## The default behaviour of split.by has changed.
## Separate violin plots are now plotted side-by-side.
## To restore the old behaviour of a single split violin,
## set split.plot = TRUE.
##
## This message will be shown once per session.
order = c("Mki67+DEFA3+pre-neut","S100A8+LTF+imtr_neut","LTF+CAMP+imtr_neut","S100A8/9+MMP9+imtr_neut","RETN+LCN2+mat_neut","TREM1+MME+mat_neut","TNFAIP3+CXCL8+mat_neut")
factor(t1$data$ident, levels=order)
t1$data$ident = factor(t1$data$ident, levels=order)
t1
#T2 signature####
neuts = AddModuleScore(neuts, features =list(c("LTC4S","MMP8","MMP9","PPIA","PRR13","PTMA","RETNLG",
"TNFAIP3","IRAK3","CYBB","CCRL2","OLR1","CLEC6A")),
name = "T2_Signature")
## Warning: The following features are not present in the object: RETNLG, not
## searching for symbol synonyms
t2 = FeaturePlot(neuts, features = "T2_Signature1",
# split.by = "tissue",
cols = c("steelblue3","red"))
t2
t2 = VlnPlot(neuts, features = "T2_Signature1",
cols = patient_cols,
split.plot = T,
split.by = "tissue",
pt.size = 0)
order = c("Mki67+DEFA3+pre-neut","S100A8+LTF+imtr_neut","LTF+CAMP+imtr_neut","S100A8/9+MMP9+imtr_neut","RETN+LCN2+mat_neut","TREM1+MME+mat_neut","TNFAIP3+CXCL8+mat_neut")
factor(t2$data$ident, levels=order)
t2$data$ident = factor(t2$data$ident, levels=order)
t2
#T3 signature####
neuts = AddModuleScore(neuts, features =list(c("ATF3","CCL3","CCL4","CD274","CSTB","CXCL3","HCAR2","HILPDA","HK2",
"HMOX1","IER3","JUN","LDHA","MIF","PLIN2","SPP1","TGIF1","TNFRSF10D",
"VEGFA","ZEB2")), name = "T3_Signature")
t3 = FeaturePlot(neuts, features = "T3_Signature1",
# split.by = "tissue",
cols = c("steelblue3","red"))
t3
t3 = VlnPlot(neuts, features = "T3_Signature1",
cols = patient_cols,
split.plot = T,
split.by = "tissue",
pt.size = 0)
order = c("Mki67+DEFA3+pre-neut","S100A8+LTF+imtr_neut","LTF+CAMP+imtr_neut","S100A8/9+MMP9+imtr_neut","RETN+LCN2+mat_neut","TREM1+MME+mat_neut","TNFAIP3+CXCL8+mat_neut")
factor(t3$data$ident, levels=order)
t3$data$ident = factor(t3$data$ident, levels=order)
t3
neuts.mat.sub = subset(neuts, idents = c("TREM1+MME+mat_neut","TNFAIP3+CXCL8+mat_neut","RETN+LCN2+mat_neut"))
DimPlot(neuts.mat.sub)
mature.neut.norm = as.matrix(neuts.mat.sub$tissue == "BM")
# mature.neut.norm
# FALSE TRUE
# 23913 11935
mature.neut.norm.T = mature.neut.norm[mature.neut.norm %in% TRUE,]
length(mature.neut.norm.T)
## [1] 11935
mature.neut.norm.barcodes = names(mature.neut.norm.T)
# 11935
mature.neut.lesion = as.matrix(neuts.mat.sub$tissue == "OL")
# table(mature.neut.lesion)
# FALSE TRUE
# 11935 23913
mature.neut.lesion.T = mature.neut.lesion[mature.neut.lesion %in% TRUE,]
length(mature.neut.lesion.T)
## [1] 23913
mature.neut.lesion.barcodes = names(mature.neut.lesion.T)
# 23913
z = FindMarkers(neuts.mat.sub, ident.1 = mature.neut.lesion.barcodes, ident.2 = mature.neut.norm.barcodes, min.pct = 0.1)
# View(z)
#pval sort
z.ord = z[order(z$avg_log2FC,decreasing = T),]
head(z.ord)
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## VEGFA 2.489244e-155 2.954315 0.115 0.032 5.176133e-151
## IER3 1.693558e-264 1.979016 0.322 0.172 3.521584e-260
## BHLHE40 1.461669e-88 1.729294 0.100 0.040 3.039394e-84
## NDRG1 1.714661e-89 1.638819 0.136 0.067 3.565465e-85
## MAFF 1.090737e-108 1.621894 0.113 0.043 2.268077e-104
## DDIT4 8.697295e-124 1.582138 0.152 0.067 1.808516e-119
EnhancedVolcano(z.ord, lab = rownames(z), x = "avg_log2FC", y = "p_val_adj",
pCutoff = 10e-50, FCcutoff = 1.15, title = "OL left vs BM right")